In [3]:
# install calmap
! pip install calmap
! pip install lxml
Requirement already satisfied: calmap in ./opt/anaconda3/lib/python3.7/site-packages (0.0.8)
Requirement already satisfied: matplotlib in ./opt/anaconda3/lib/python3.7/site-packages (from calmap) (3.1.3)
Requirement already satisfied: pandas in ./opt/anaconda3/lib/python3.7/site-packages (from calmap) (1.0.3)
Requirement already satisfied: numpy in ./opt/anaconda3/lib/python3.7/site-packages (from calmap) (1.18.1)
Requirement already satisfied: kiwisolver>=1.0.1 in ./opt/anaconda3/lib/python3.7/site-packages (from matplotlib->calmap) (1.2.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in ./opt/anaconda3/lib/python3.7/site-packages (from matplotlib->calmap) (2.4.6)
Requirement already satisfied: python-dateutil>=2.1 in ./opt/anaconda3/lib/python3.7/site-packages (from matplotlib->calmap) (2.8.1)
Requirement already satisfied: cycler>=0.10 in ./opt/anaconda3/lib/python3.7/site-packages (from matplotlib->calmap) (0.10.0)
Requirement already satisfied: pytz>=2017.2 in ./opt/anaconda3/lib/python3.7/site-packages (from pandas->calmap) (2019.3)
Requirement already satisfied: six>=1.5 in ./opt/anaconda3/lib/python3.7/site-packages (from python-dateutil>=2.1->matplotlib->calmap) (1.14.0)
Requirement already satisfied: lxml in ./opt/anaconda3/lib/python3.7/site-packages (4.5.2)

Importation Des library

In [4]:
# essential libraries
import json
import random
from urllib.request import urlopen
import requests
import lxml.html as lh

# storing and analysis
import numpy as np
import pandas as pd


# visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
import calmap
import folium
import seaborn as sns

# offline plotly visualization
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly as py
import plotly.graph_objs as go
init_notebook_mode(connected=True) 

# color pallette
tpc = '#393e46' # confirmed - grey
dth = '#ff2e63' # death - red
rec = '#21bf73' # recovered - cyan
act = '#fe9801' # active cases - yellow
hos = '#d2691e' # hospitalized cases - brown

# converter
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()   

# hide warnings
import warnings
warnings.filterwarnings('ignore')

# gathering the geojson for Italian Regions
with urlopen('https://gist.githubusercontent.com/datajournalism-it/48e29e7c87dca7eb1d29/raw/2636aeef92ba0770a073424853f37690064eb0ea/regioni.geojson') as response:
    regions = json.load(response)

# gathering the geojson for Italian Provinces
with urlopen('https://raw.githubusercontent.com/openpolis/geojson-italy/master/geojson/limits_IT_provinces.geojson') as response:
    provinces = json.load(response)

Dataset

Regional Data

Description des variables

  • SNo: Numéro de série

  • Date: Date de notification au format AAAA-MM-JJTHH: MM: SS (ISO 8601)

  • Country: Pays au format XYZ (ISO 3166-1 alpha-3)

  • RegionCode: Code de la région (ISTAT 2019)

  • RegionName: Nom de la région

  • Latitude: Latitude par région

  • Longitude: Longitude par région

  • HospitalizedPatients: Patients hospitalisés présentant des symptômes, non en soins intensifs

  • IntensiveCarePatients: Patients en soins intensifs

  • TotalHospitalizedPatients: Total des patients hospitalisés (patients hospitalisés + patients en soins intensifs)

  • HomeConfinement: Les personnes en quarantaine par confinement à domicile

  • CurrentPositiveCases: Nombre total de cas positifs actuels (patients hospitalisés en quarantaine domestique)

  • NewPositiveCases: Nouveau nombre de cas positifs actuels (HospitalizedPatients + HomeConfinement)

  • Recovered: Nombre de cas récupérés

  • Deaths: Nombre de décès

  • TotalPositiveCases: Nombre total de cas positifs

  • TestsPerformed: Nombre de tests effectués

In [5]:
# importing datasets
Data_Byregion = pd.read_csv(r"/Users/yassinelatif/Desktop/Project.lsd/covid19_italy_region.csv", 
                         names = ['SNo','Date', 'Country', 'RegionCode', 'RegionName', 'Latitude', 'Longitude', 'HospitalizedPatients', 'IntensiveCarePatients', 'TotalHospitalizedPatients', 'HomeConfinement', 'CurrentPositiveCases', 'NewPositiveCases', 'Recovered', 'Deaths', 'TotalPositiveCases', 'TestsPerformed'], 
                         header = 0,
                         index_col = False)
Data_Byregion['Date'] = pd.to_datetime(Data_Byregion['Date'])
Data_Byregion.replace("Emilia Romagna", "Emilia-Romagna", inplace = True)
Data_Byregion.head()
Out[5]:
SNo Date Country RegionCode RegionName Latitude Longitude HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients HomeConfinement CurrentPositiveCases NewPositiveCases Recovered Deaths TotalPositiveCases TestsPerformed
0 0 2020-02-24 18:00:00 ITA 13 Abruzzo 42.351222 13.398438 0 0 0 0 0 0 0 0 0 NaN
1 1 2020-02-24 18:00:00 ITA 17 Basilicata 40.639471 15.805148 0 0 0 0 0 0 0 0 0 NaN
2 2 2020-02-24 18:00:00 ITA 18 Calabria 38.905976 16.594402 0 0 0 0 0 0 0 0 0 NaN
3 3 2020-02-24 18:00:00 ITA 15 Campania 40.839566 14.250850 0 0 0 0 0 0 0 0 0 NaN
4 4 2020-02-24 18:00:00 ITA 8 Emilia-Romagna 44.494367 11.341721 10 2 12 6 18 18 0 0 18 NaN
In [6]:
# dataframe info
Data_Byregion.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3969 entries, 0 to 3968
Data columns (total 17 columns):
 #   Column                     Non-Null Count  Dtype         
---  ------                     --------------  -----         
 0   SNo                        3969 non-null   int64         
 1   Date                       3969 non-null   datetime64[ns]
 2   Country                    3969 non-null   object        
 3   RegionCode                 3969 non-null   int64         
 4   RegionName                 3969 non-null   object        
 5   Latitude                   3969 non-null   float64       
 6   Longitude                  3969 non-null   float64       
 7   HospitalizedPatients       3969 non-null   int64         
 8   IntensiveCarePatients      3969 non-null   int64         
 9   TotalHospitalizedPatients  3969 non-null   int64         
 10  HomeConfinement            3969 non-null   int64         
 11  CurrentPositiveCases       3969 non-null   int64         
 12  NewPositiveCases           3969 non-null   int64         
 13  Recovered                  3969 non-null   int64         
 14  Deaths                     3969 non-null   int64         
 15  TotalPositiveCases         3969 non-null   int64         
 16  TestsPerformed             2814 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(11), object(2)
memory usage: 527.3+ KB
In [7]:
# checking for missing value
Data_Byregion.isna().sum()
Out[7]:
SNo                             0
Date                            0
Country                         0
RegionCode                      0
RegionName                      0
Latitude                        0
Longitude                       0
HospitalizedPatients            0
IntensiveCarePatients           0
TotalHospitalizedPatients       0
HomeConfinement                 0
CurrentPositiveCases            0
NewPositiveCases                0
Recovered                       0
Deaths                          0
TotalPositiveCases              0
TestsPerformed               1155
dtype: int64
In [8]:
#Scraper to create the dataframe with the population by region
url='https://www.tuttitalia.it/regioni/popolazione/'
page = requests.get(url)
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')
[len(T) for T in tr_elements]

col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append((name,[]))
    

for j in range(1,len(tr_elements)):
    T=tr_elements[j]
    
    if len(T)!=7:
        break
    
    i=0
    
    for t in T.iterchildren():
        data=t.text_content() 
        if i>0:
            try:
                data=int(data)
            except:
                pass
        col[i][1].append(data)
        i+=1
        
Dict = {title:column for (title,column) in col}
pop_reg = pd.DataFrame(Dict)
pop_reg = pop_reg.iloc[:,1:3]
pop_reg.columns = ['RegionName','Population']

for i in range(0, len(pop_reg['Population'])):
    pop_reg['Population'][i] = float(pop_reg['Population'][i].translate({ord('.'): None}))
pop_reg['Population'] = pop_reg['Population'].astype(float)

Preprocessing

Cleaning Data Per Region

In [9]:
Data_Byregion.tail()
Out[9]:
SNo Date Country RegionCode RegionName Latitude Longitude HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients HomeConfinement CurrentPositiveCases NewPositiveCases Recovered Deaths TotalPositiveCases TestsPerformed
3964 3964 2020-08-30 17:00:00 ITA 19 Sicilia 38.115697 13.362357 68 10 78 1036 1114 34 2891 286 4291 266061.0
3965 3965 2020-08-30 17:00:00 ITA 9 Toscana 43.769231 11.255889 48 5 53 1450 1503 98 9141 1141 11785 353231.0
3966 3966 2020-08-30 17:00:00 ITA 10 Umbria 43.106758 12.388247 10 1 11 251 262 31 1442 80 1784 95419.0
3967 3967 2020-08-30 17:00:00 ITA 2 Valle d'Aosta 45.737503 7.320149 2 0 2 21 23 2 1063 146 1232 17533.0
3968 3968 2020-08-30 17:00:00 ITA 5 Veneto 45.434905 12.338452 52 7 59 1286 1345 109 19399 2120 22864 607977.0
In [10]:
Data_Byregion.isnull()
Out[10]:
SNo Date Country RegionCode RegionName Latitude Longitude HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients HomeConfinement CurrentPositiveCases NewPositiveCases Recovered Deaths TotalPositiveCases TestsPerformed
0 False False False False False False False False False False False False False False False False True
1 False False False False False False False False False False False False False False False False True
2 False False False False False False False False False False False False False False False False True
3 False False False False False False False False False False False False False False False False True
4 False False False False False False False False False False False False False False False False True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3964 False False False False False False False False False False False False False False False False False
3965 False False False False False False False False False False False False False False False False False
3966 False False False False False False False False False False False False False False False False False
3967 False False False False False False False False False False False False False False False False False
3968 False False False False False False False False False False False False False False False False False

3969 rows × 17 columns

True càd on a une valeur manquante dans cette columns

D'aprés la commande data.insull().sum() on a la sommes des valeurs manquantes est 1155 valeurs manquantes

Remplacer les valeurs manquantes par la median

In [11]:
median = int(Data_Byregion["TestsPerformed"].median())
Data_Byregion["TestsPerformed"].fillna(median, inplace=True)

Verifier column TestPerformed

In [12]:
Data_Byregion["TestsPerformed"]
Out[12]:
0        87426.0
1        87426.0
2        87426.0
3        87426.0
4        87426.0
          ...   
3964    266061.0
3965    353231.0
3966     95419.0
3967     17533.0
3968    607977.0
Name: TestsPerformed, Length: 3969, dtype: float64

voilà maintenant on a remplacer les valeur manquante par la partie entiére de la médian parce que on peut pas faire un test demi c'est illogique

In [13]:
Data_Byregion.isnull().sum()
Out[13]:
SNo                          0
Date                         0
Country                      0
RegionCode                   0
RegionName                   0
Latitude                     0
Longitude                    0
HospitalizedPatients         0
IntensiveCarePatients        0
TotalHospitalizedPatients    0
HomeConfinement              0
CurrentPositiveCases         0
NewPositiveCases             0
Recovered                    0
Deaths                       0
TotalPositiveCases           0
TestsPerformed               0
dtype: int64
In [14]:
Data_Byregion.head()
Out[14]:
SNo Date Country RegionCode RegionName Latitude Longitude HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients HomeConfinement CurrentPositiveCases NewPositiveCases Recovered Deaths TotalPositiveCases TestsPerformed
0 0 2020-02-24 18:00:00 ITA 13 Abruzzo 42.351222 13.398438 0 0 0 0 0 0 0 0 0 87426.0
1 1 2020-02-24 18:00:00 ITA 17 Basilicata 40.639471 15.805148 0 0 0 0 0 0 0 0 0 87426.0
2 2 2020-02-24 18:00:00 ITA 18 Calabria 38.905976 16.594402 0 0 0 0 0 0 0 0 0 87426.0
3 3 2020-02-24 18:00:00 ITA 15 Campania 40.839566 14.250850 0 0 0 0 0 0 0 0 0 87426.0
4 4 2020-02-24 18:00:00 ITA 8 Emilia-Romagna 44.494367 11.341721 10 2 12 6 18 18 0 0 18 87426.0
In [15]:
# cases 
cases = ['TotalPositiveCases', 'Deaths', 'Recovered', 'Active']

# Active Case = confirmed - deaths - recovered
Data_Byregion['Active'] = Data_Byregion['TotalPositiveCases'] - Data_Byregion['Deaths'] - Data_Byregion['Recovered']

Regroupement des data par region

In [16]:
# latest
data = Data_Byregion[Data_Byregion['Date'] == max(Data_Byregion['Date'])].reset_index()

# latest condensed
data_grouped = data.groupby('RegionName')['TotalPositiveCases', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

#latest condensed with data about swabs (tests), quarantine and hospitalization
data_grouped_moreinfo = data.groupby('RegionName')['TotalPositiveCases', 'Deaths', 'Recovered', 'Active','TestsPerformed','HomeConfinement','HospitalizedPatients', 'IntensiveCarePatients', 'TotalHospitalizedPatients'].sum().reset_index()

#Regional visualization adjustment (Merging Trento and Bolzano into Trentino-Alto Adige)
dgm_2 = data.copy()
dgm_2.replace("P.A. Bolzano", "Trentino-Alto Adige", inplace = True)
dgm_2.replace("P.A. Trento", "Trentino-Alto Adige", inplace = True)
dgm_2 = dgm_2.groupby('RegionName')['TotalPositiveCases', 'Deaths', 'Recovered', 'Active','TestsPerformed','HomeConfinement','HospitalizedPatients', 'IntensiveCarePatients', 'TotalHospitalizedPatients'].sum().reset_index()

Province Data

In [17]:
Data_Byprovince = pd.read_csv(r"/Users/yassinelatif/Desktop/Project.lsd/covid19_italy_province.csv", parse_dates=["Date"])

Variables Description

  • SNo: Serial Number

  • Date: Date of Notification in format YYYY-MM-DDTHH:MM:SS (ISO 8601)

  • Country: Country in format XYZ (ISO 3166-1 alpha-3)

  • RegionCode: Code of the Region (ISTAT 2019)

  • Longitude : Longitude par province

  • RegionName: Name of the Region

  • ProvinceCode: Code de la province (ISTAT 2019)

  • ProvinceName: Nom de la province

  • ProvinceAbbreviation: Province abrégée (2 lettres)

  • Latitude : Latitude par province

  • Longitude : Longitude par province

  • TotalPositiveCases: Nombre total de cas positifs par province

In [18]:
# importing datasets
Data_Byprovince = pd.read_csv(r"/Users/yassinelatif/Desktop/Project.lsd/covid19_italy_province.csv", parse_dates=["Date"], 
                         names = ['SNo','Date', 'Country', 'RegionCode', 'RegionName','ProvinceCode','ProvinceName','ProvinceAbbreviation', 'Latitude', 'Longitude', 'TotalPositiveCases'], 
                         header = 0,
                         index_col = False)
Data_Byprovince.head()
Out[18]:
SNo Date Country RegionCode RegionName ProvinceCode ProvinceName ProvinceAbbreviation Latitude Longitude TotalPositiveCases
0 0 2020-02-24 18:00:00 ITA 13 Abruzzo 66 L'Aquila AQ 42.351222 13.398438 0
1 1 2020-02-24 18:00:00 ITA 13 Abruzzo 67 Teramo TE 42.658918 13.704400 0
2 2 2020-02-24 18:00:00 ITA 13 Abruzzo 68 Pescara PE 42.464584 14.213648 0
3 3 2020-02-24 18:00:00 ITA 13 Abruzzo 69 Chieti CH 42.351032 14.167546 0
4 4 2020-02-24 18:00:00 ITA 13 Abruzzo 979 In fase di definizione/aggiornamento NaN NaN NaN 0
In [19]:
# dataframe info
Data_Byprovince.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25599 entries, 0 to 25598
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   SNo                   25599 non-null  int64         
 1   Date                  25599 non-null  datetime64[ns]
 2   Country               25599 non-null  object        
 3   RegionCode            25599 non-null  int64         
 4   RegionName            25599 non-null  object        
 5   ProvinceCode          25599 non-null  int64         
 6   ProvinceName          25599 non-null  object        
 7   ProvinceAbbreviation  20034 non-null  object        
 8   Latitude              20223 non-null  float64       
 9   Longitude             20223 non-null  float64       
 10  TotalPositiveCases    25599 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(4)
memory usage: 2.1+ MB

Cleaning Data Per Province

In [20]:
Data_Byprovince.tail()
Out[20]:
SNo Date Country RegionCode RegionName ProvinceCode ProvinceName ProvinceAbbreviation Latitude Longitude TotalPositiveCases
25594 25594 2020-08-30 17:00:00 ITA 5 Veneto 27 Venezia VE 45.434905 12.338452 3238
25595 25595 2020-08-30 17:00:00 ITA 5 Veneto 28 Padova PD 45.406930 11.876087 4710
25596 25596 2020-08-30 17:00:00 ITA 5 Veneto 29 Rovigo RO 45.071073 11.790070 535
25597 25597 2020-08-30 17:00:00 ITA 5 Veneto 899 Fuori Regione / Provincia Autonoma NaN NaN NaN 370
25598 25598 2020-08-30 17:00:00 ITA 5 Veneto 999 In fase di definizione/aggiornamento NaN NaN NaN 48
In [21]:
Data_Byprovince.isnull()
Out[21]:
SNo Date Country RegionCode RegionName ProvinceCode ProvinceName ProvinceAbbreviation Latitude Longitude TotalPositiveCases
0 False False False False False False False False False False False
1 False False False False False False False False False False False
2 False False False False False False False False False False False
3 False False False False False False False False False False False
4 False False False False False False False True True True False
... ... ... ... ... ... ... ... ... ... ... ...
25594 False False False False False False False False False False False
25595 False False False False False False False False False False False
25596 False False False False False False False False False False False
25597 False False False False False False False True True True False
25598 False False False False False False False True True True False

25599 rows × 11 columns

In [22]:
Data_Byprovince.isnull().sum()
Out[22]:
SNo                        0
Date                       0
Country                    0
RegionCode                 0
RegionName                 0
ProvinceCode               0
ProvinceName               0
ProvinceAbbreviation    5565
Latitude                5376
Longitude               5376
TotalPositiveCases         0
dtype: int64

True càd on a une valeur manquante dans cette column

D'aprés la commande data.insull().sum() on a la sommes des valeurs manquantes est 16317 valeurs manquantes

Remplacer les valeurs manquantes par la median et NAN par 'ID' dans la column ProvinceAbbreviation

In [23]:
median = Data_Byprovince["Latitude"].median()
Data_Byprovince["Latitude"].fillna(median, inplace=True)
In [24]:
median = Data_Byprovince["Longitude"].median()
Data_Byprovince["Longitude"].fillna(median, inplace=True)
In [25]:
Data_Byprovince["ProvinceAbbreviation"].fillna("ID", inplace=True)
In [26]:
Data_Byprovince.isnull().sum()
Out[26]:
SNo                     0
Date                    0
Country                 0
RegionCode              0
RegionName              0
ProvinceCode            0
ProvinceName            0
ProvinceAbbreviation    0
Latitude                0
Longitude               0
TotalPositiveCases      0
dtype: int64
In [27]:
Data_Byprovince.head()
Out[27]:
SNo Date Country RegionCode RegionName ProvinceCode ProvinceName ProvinceAbbreviation Latitude Longitude TotalPositiveCases
0 0 2020-02-24 18:00:00 ITA 13 Abruzzo 66 L'Aquila AQ 42.351222 13.398438 0
1 1 2020-02-24 18:00:00 ITA 13 Abruzzo 67 Teramo TE 42.658918 13.704400 0
2 2 2020-02-24 18:00:00 ITA 13 Abruzzo 68 Pescara PE 42.464584 14.213648 0
3 3 2020-02-24 18:00:00 ITA 13 Abruzzo 69 Chieti CH 42.351032 14.167546 0
4 4 2020-02-24 18:00:00 ITA 13 Abruzzo 979 In fase di definizione/aggiornamento ID 43.715532 12.104734 0

Regroupement des data par Province

In [28]:
# latest
Data_Byprovince = Data_Byprovince[Data_Byprovince['Date'] == max(Data_Byprovince['Date'])].reset_index()

# latest condensed
data_grouped_province = Data_Byprovince.groupby('ProvinceName')['TotalPositiveCases'].sum().reset_index()

Visualisation Des Données Par Region

In [30]:
#Visualisation de les pays les plus touchées par COVID-19
temp = data.groupby(['RegionName'])['TotalPositiveCases', 'Deaths', 'Recovered','Active'].max()
temp.style.background_gradient(cmap='Reds')
Out[30]:
TotalPositiveCases Deaths Recovered Active
RegionName
Abruzzo 3773 472 2872 429
Basilicata 524 28 408 88
Calabria 1477 97 1148 232
Campania 6882 445 4412 2025
Emilia-Romagna 31805 4459 24478 2868
Friuli Venezia Giulia 3764 348 3056 360
Lazio 11043 878 7130 3035
Liguria 10907 1571 8827 509
Lombardia 99940 16863 76248 6829
Marche 7238 987 5949 302
Molise 525 23 432 70
P.A. Bolzano 2932 292 2450 190
P.A. Trento 5092 405 4600 87
Piemonte 32844 4146 27293 1405
Puglia 5402 556 4029 817
Sardegna 2114 134 1268 712
Sicilia 4291 286 2891 1114
Toscana 11785 1141 9141 1503
Umbria 1784 80 1442 262
Valle d'Aosta 1232 146 1063 23
Veneto 22864 2120 19399 1345
In [31]:
temp = data.groupby('Date')['TotalPositiveCases', 'Deaths', 'Recovered','Active'].sum().reset_index()
temp = temp[temp['Date']==max(temp['Date'])].reset_index(drop=True)
temp.style.background_gradient(cmap='Pastel1')
Out[31]:
Date TotalPositiveCases Deaths Recovered Active
0 2020-08-30 17:00:00 268218 35477 208536 24205
In [32]:
tm = temp.melt(id_vars="Date", value_vars=['TotalPositiveCases', 'Deaths', 'Recovered','Active'])
fig = px.treemap(tm, path=["variable"], values="value", height=400, width=600,
                color_discrete_sequence=[tpc, dth, rec, act])
fig.show()

Region-wise Data

Confirmed, Deaths, Recovered cases by Region

In [33]:
temp_f = data_grouped.sort_values(by='TotalPositiveCases', ascending=False)
temp_f = temp_f.reset_index(drop=True)
temp_f.style.background_gradient(cmap='Reds')
Out[33]:
RegionName TotalPositiveCases Deaths Recovered Active
0 Lombardia 99940 16863 76248 6829
1 Piemonte 32844 4146 27293 1405
2 Emilia-Romagna 31805 4459 24478 2868
3 Veneto 22864 2120 19399 1345
4 Toscana 11785 1141 9141 1503
5 Lazio 11043 878 7130 3035
6 Liguria 10907 1571 8827 509
7 Marche 7238 987 5949 302
8 Campania 6882 445 4412 2025
9 Puglia 5402 556 4029 817
10 P.A. Trento 5092 405 4600 87
11 Sicilia 4291 286 2891 1114
12 Abruzzo 3773 472 2872 429
13 Friuli Venezia Giulia 3764 348 3056 360
14 P.A. Bolzano 2932 292 2450 190
15 Sardegna 2114 134 1268 712
16 Umbria 1784 80 1442 262
17 Calabria 1477 97 1148 232
18 Valle d'Aosta 1232 146 1063 23
19 Molise 525 23 432 70
20 Basilicata 524 28 408 88

Regions with deaths reported

In [34]:
temp_dg = temp_f[temp_f['Deaths']>0][['RegionName', 'Deaths']]
temp_dg.sort_values('Deaths', ascending=False).reset_index(drop=True).style.background_gradient(cmap='Reds')
Out[34]:
RegionName Deaths
0 Lombardia 16863
1 Emilia-Romagna 4459
2 Piemonte 4146
3 Veneto 2120
4 Liguria 1571
5 Toscana 1141
6 Marche 987
7 Lazio 878
8 Puglia 556
9 Abruzzo 472
10 Campania 445
11 P.A. Trento 405
12 Friuli Venezia Giulia 348
13 P.A. Bolzano 292
14 Sicilia 286
15 Valle d'Aosta 146
16 Sardegna 134
17 Calabria 97
18 Umbria 80
19 Basilicata 28
20 Molise 23

Regions with no cases reported as recovered

In [35]:
temp = temp_f[temp_f['Recovered']==0][['RegionName', 'TotalPositiveCases', 'Deaths', 'Recovered']]
temp.reset_index(drop=True).style.background_gradient(cmap='Reds')
Out[35]:
RegionName TotalPositiveCases Deaths Recovered

On déduit qu'aucune region n'en est affecté par Covid-19

Regions with Recovered reported

In [36]:
temp_dg = temp_f[temp_f['Deaths']>0][['RegionName', 'Recovered']]
temp_dg.sort_values('Recovered', ascending=False).reset_index(drop=True).style.background_gradient(cmap='Greens')
Out[36]:
RegionName Recovered
0 Lombardia 76248
1 Piemonte 27293
2 Emilia-Romagna 24478
3 Veneto 19399
4 Toscana 9141
5 Liguria 8827
6 Lazio 7130
7 Marche 5949
8 P.A. Trento 4600
9 Campania 4412
10 Puglia 4029
11 Friuli Venezia Giulia 3056
12 Sicilia 2891
13 Abruzzo 2872
14 P.A. Bolzano 2450
15 Umbria 1442
16 Sardegna 1268
17 Calabria 1148
18 Valle d'Aosta 1063
19 Molise 432
20 Basilicata 408
In [37]:
temp = data_grouped[data_grouped['TotalPositiveCases']==
                          data_grouped['Deaths']+
                          data_grouped['Recovered']]
temp = temp[['RegionName', 'TotalPositiveCases', 'Deaths', 'Recovered']]
temp = temp.sort_values('TotalPositiveCases', ascending=False)
temp = temp.reset_index(drop=True)
temp.style.background_gradient(cmap='Greens')
Out[37]:
RegionName TotalPositiveCases Deaths Recovered
  • On conclue qu'ancune région où les cas ne sont plus concernés

Further data about swabs, domestic quarantine and hospitalization

By Region

In [38]:
temp_f = data_grouped_moreinfo.sort_values(by='TotalPositiveCases', ascending=False)
temp_f = temp_f.reset_index(drop=True)

temp_f.style.background_gradient(cmap='Reds')
Out[38]:
RegionName TotalPositiveCases Deaths Recovered Active TestsPerformed HomeConfinement HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients
0 Lombardia 99940 16863 76248 6829 966973.000000 6615 194 20 214
1 Piemonte 32844 4146 27293 1405 359814.000000 1315 85 5 90
2 Emilia-Romagna 31805 4459 24478 2868 524656.000000 2758 99 11 110
3 Veneto 22864 2120 19399 1345 607977.000000 1286 52 7 59
4 Toscana 11785 1141 9141 1503 353231.000000 1450 48 5 53
5 Lazio 11043 878 7130 3035 466475.000000 2718 310 7 317
6 Liguria 10907 1571 8827 509 127057.000000 476 32 1 33
7 Marche 7238 987 5949 302 121344.000000 289 12 1 13
8 Campania 6882 445 4412 2025 233301.000000 1911 110 4 114
9 Puglia 5402 556 4029 817 213959.000000 695 117 5 122
10 P.A. Trento 5092 405 4600 87 82952.000000 83 3 1 4
11 Sicilia 4291 286 2891 1114 266061.000000 1036 68 10 78
12 Abruzzo 3773 472 2872 429 101829.000000 391 37 1 38
13 Friuli Venezia Giulia 3764 348 3056 360 152158.000000 344 15 1 16
14 P.A. Bolzano 2932 292 2450 190 72684.000000 180 8 2 10
15 Sardegna 2114 134 1268 712 114114.000000 681 28 3 31
16 Umbria 1784 80 1442 262 95419.000000 251 10 1 11
17 Calabria 1477 97 1148 232 152885.000000 215 17 0 17
18 Valle d'Aosta 1232 146 1063 23 17533.000000 21 2 0 2
19 Molise 525 23 432 70 32155.000000 67 3 0 3
20 Basilicata 524 28 408 88 55211.000000 86 1 1 2

Throughout Italy

In [39]:
temp_f = Data_Byregion.groupby('Date')['TotalPositiveCases', 'Deaths', 'Recovered', 'Active','HomeConfinement','TotalHospitalizedPatients','HospitalizedPatients', 'IntensiveCarePatients','TestsPerformed'].sum().reset_index()
temp_f = temp_f[temp_f['Date']==max(temp_f['Date'])].reset_index(drop=True)
temp_f.style.background_gradient(cmap='Pastel1')
Out[39]:
Date TotalPositiveCases Deaths Recovered Active HomeConfinement TotalHospitalizedPatients HospitalizedPatients IntensiveCarePatients TestsPerformed
0 2020-08-30 17:00:00 268218 35477 208536 24205 22868 1337 1251 86 5117788.000000

Maps

Across Italy - Regions

In [40]:
# Italy_Regions

m_Regions = folium.Map(location=[41.8719, 12.5674],
               min_zoom=5, max_zoom=10, zoom_start=5)

for i in range(0, len(data)):
    folium.Circle(
        location=[data.iloc[i]['Latitude'], data.iloc[i]['Longitude']],
        color='crimson', 
        fill = True,
        fill_color='crimson',
        tooltip =   "<div style='margin: 0; background-color: black; color: white;'>"+
                    '<li><bold>Country : '+str(data.iloc[i]['Country'])+
                    '<li><bold>RegionName : '+str(data.iloc[i]['RegionName'])+
                    '<li><bold>TotalPositiveCases : '+str(data.iloc[i]['TotalPositiveCases'])+
                    '<li><bold>Deaths : '+str(data.iloc[i]['Deaths'])+
                    '<li><bold>Recovered : '+str(data.iloc[i]['Recovered'])+
                    '<li><bold>Active : '+str(data.iloc[i]['Active'])+
                    "<li>Taux de mortalite: "+ str(round((data.iloc[i]['Deaths']/data.iloc[i]['TotalPositiveCases'])*100, 2))+ "</li>"+
                    "</ul></div>",
        radius=int(data.iloc[i]['TotalPositiveCases'])**1).add_to(m_Regions)
m_Regions.save('m_Regions.html')

m_Regions
Out[40]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [41]:
#Making sure the properties from the geojson include the region name

print(data_grouped["RegionName"][0])

print(regions["features"][3]["properties"])
Abruzzo
{'NOME_REG': 'Trentino-Alto Adige'}
In [42]:
#Total Positive Cases
fig = go.Figure(go.Choroplethmapbox(geojson=regions, locations=dgm_2['RegionName'],
                                    featureidkey="properties.NOME_REG",
                                    z=dgm_2['TotalPositiveCases'], colorscale='matter', zmin=0, zmax=max(dgm_2['TotalPositiveCases']),
                                    marker_opacity=0.8, marker_line_width=0.1))
fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=4, mapbox_center = {"lat": 41.8719, "lon": 12.5674})
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.update_traces(showscale=True)
fig.update_layout(title='Total Positive Cases by Region')
fig.show()
In [43]:
# Deaths
fig = go.Figure(go.Choroplethmapbox(geojson=regions, locations=dgm_2['RegionName'],
                                    featureidkey="properties.NOME_REG",
                                    z=dgm_2['Deaths'], colorscale='amp', zmin=0, zmax=max(dgm_2['Deaths']),
                                    marker_opacity=0.8, marker_line_width=0.1))
fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=4, mapbox_center = {"lat": 41.8719, "lon": 12.5674})
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.update_traces(showscale=True)
fig.update_layout(title='Deaths by Region')
fig.show()
In [201]:
formated_gdf = Data_Byregion.groupby(['Date', 'RegionName'])['Latitude','Longitude','TotalPositiveCases', 'Deaths'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['TotalPositiveCases'].pow(0.5)

fig = px.scatter_mapbox(formated_gdf, lat="Latitude", lon="Longitude",
                     color="TotalPositiveCases", size='size', hover_name="RegionName", hover_data=['TotalPositiveCases','Deaths'],
                     color_continuous_scale='matter',
                     range_color= [0, max(formated_gdf['TotalPositiveCases'])+2],
                     animation_frame="Date", 
                     title='Spread over time')
fig.update(layout_coloraxis_showscale=True)
fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=4, mapbox_center = {"lat": 41.8719, "lon": 12.5674})
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()

Across Italy - Provinces

Remarque: La carte ci-dessous montre uniquement les cas confirmés qui ont été attribués à une province dans l'ensemble de données et ne tient pas compte de ceux qui n'ont pas été attribués. Malheureusement, l'ensemble de données de la province ne rapporte que les cas confirmés sans autre classification.

In [45]:
#otal Positive Cases
temp = Data_Byprovince.groupby(['ProvinceName', 'ProvinceCode'])['TotalPositiveCases'].sum().reset_index()

fig = go.Figure(go.Choroplethmapbox(geojson=provinces, locations=temp['ProvinceCode'],
                                    featureidkey="properties.prov_istat_code_num",
                                    z=temp['TotalPositiveCases'], colorscale='matter', zmin=0, zmax=max(temp['TotalPositiveCases']),
                                    text = temp['ProvinceName'],
                                    hoverinfo = 'text+z',
                                    marker_opacity=0.8, marker_line_width=0.1))
fig.update_layout(mapbox_style="carto-positron",
                  mapbox_zoom=4, mapbox_center = {"lat": 41.8719, "lon": 12.5674})
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.update_traces(showscale=True)
fig.update_layout(title='Total Positive Cases Cases by Province')
fig.show()

Evolution of total cases over time

In [92]:
temp = Data_Byregion.groupby('Date')['Deaths', 'Recovered', 'Active','TotalPositiveCases'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Deaths', 'Recovered', 'Active', 'TotalPositiveCases'],
                 var_name='Case', value_name='Count')
temp.head()

fig = px.area(temp, x="Date", y="Count", color='Case',
             title='Cases over time', color_discrete_sequence = [dth, rec, act, tpc])
fig.show()

Recovery, mortality and hospitalization rate over time

Notez s'il vous plaît:

Il est très probable que les taux indiqués ci-dessous surestiment la létalité réelle du COVID-19, car le nombre réel de personnes infectées pourrait facilement être supérieur aux cas confirmés.

In [47]:
temp = Data_Byregion.groupby('Date').sum().reset_index()

# adding two more columns
temp['No. of Deaths to 100 Confirmed Cases'] = round(temp['Deaths']/temp['TotalPositiveCases'], 3)*100
temp['No. of Recovered to 100 Confirmed Cases'] = round(temp['Recovered']/temp['TotalPositiveCases'], 3)*100
temp['No. of Hospitalized to 100 Confirmed Cases'] = round(temp['TotalHospitalizedPatients']/temp['TotalPositiveCases'], 3)*100

# temp['No. of Recovered to 1 Death Case'] = round(temp['Recovered']/temp['Deaths'], 3)

temp = temp.melt(id_vars='Date', value_vars=['No. of Deaths to 100 Confirmed Cases', 'No. of Recovered to 100 Confirmed Cases', 'No. of Hospitalized to 100 Confirmed Cases'], 
                 var_name='Ratio', value_name='Value')

fig = px.line(temp, x="Date", y="Value", color='Ratio', log_y=True, 
              title='Recovery, Mortality and Hospitalization Rate Over The Time', color_discrete_sequence=[dth, rec, hos],
              height=800)
fig.update_layout(legend_orientation='h', legend_title='')
fig.show()

Nombre de régions dans lesquelles le COVID-19 s'est propagé

Remarque:

Comme mentionné précédemment, les provinces autonomes de Trente et Bolzano sont étiquetées comme des régions, de sorte que le nombre total de régions s'élève à 21.

In [226]:
reg_spread = Data_Byregion[Data_Byregion['TotalPositiveCases']!=0].groupby('Date')['RegionName'].unique().apply(len)
reg_spread = pd.DataFrame(reg_spread).reset_index()

fig = px.line(reg_spread, x='Date', y='RegionName',
              title='Number of Italian Regions to which COVID-19 spread over the time',
             color_discrete_sequence=[tpc,dth, rec])
fig.update_traces(textposition='top center')
fig.update_layout(uniformtext_minsize=5, uniformtext_mode='hide')
fig.show()

La vue d'ensemble des cas par région

In [140]:
cl = data.groupby('RegionName')['TotalPositiveCases', 'Deaths', 'Recovered'].sum()
cl = cl.reset_index().sort_values(by='TotalPositiveCases', ascending=False).reset_index(drop=True)
cl.head().style.background_gradient(cmap='rainbow')
Out[140]:
RegionName TotalPositiveCases Deaths Recovered
0 Lombardia 99940 16863 76248
1 Piemonte 32844 4146 27293
2 Emilia-Romagna 31805 4459 24478
3 Veneto 22864 2120 19399
4 Toscana 11785 1141 9141
In [141]:
ncl = cl.copy()
ncl['Active'] = ncl['TotalPositiveCases'] - ncl['Deaths'] - ncl['Recovered']
ncl = ncl.melt(id_vars="RegionName", value_vars=['Active', 'Recovered', 'Deaths', 'TotalPositiveCases'])

fig = px.bar(ncl.sort_values(['variable', 'value']), 
             y="RegionName", x="value", color='variable', orientation='h', height=800,
             title='Number and state of Cases by Region', color_discrete_sequence=[act, dth, rec, tpc])
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(opacity=0.6)
fig.show()

Top 5 des régions par catégorie

In [50]:
dgm = data_grouped_moreinfo

dgm.head()
Out[50]:
RegionName TotalPositiveCases Deaths Recovered Active TestsPerformed HomeConfinement HospitalizedPatients IntensiveCarePatients TotalHospitalizedPatients
0 Abruzzo 3773 472 2872 429 101829.0 391 37 1 38
1 Basilicata 524 28 408 88 55211.0 86 1 1 2
2 Calabria 1477 97 1148 232 152885.0 215 17 0 17
3 Campania 6882 445 4412 2025 233301.0 1911 110 4 114
4 Emilia-Romagna 31805 4459 24478 2868 524656.0 2758 99 11 110
In [51]:
fig = px.bar(dgm.sort_values('TotalPositiveCases', ascending=False).head(5).sort_values('TotalPositiveCases', ascending=True), 
             x="TotalPositiveCases", y="RegionName", title='Total Positive Cases', text='TotalPositiveCases', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['TotalPositiveCases'])+10000])
fig.update_traces(marker_color=tpc, opacity=0.6, textposition='outside')
fig.show()
In [52]:
fig = px.bar(dgm.sort_values('Deaths', ascending=False).head(5).sort_values('Deaths', ascending=True), 
             x="Deaths", y="RegionName", title='Deaths', text='Deaths', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['Deaths'])+5000])
fig.update_traces(marker_color=dth, opacity=0.6, textposition='outside')
fig.show()
In [53]:
fig = px.bar(dgm.sort_values('Recovered', ascending=False).head(5).sort_values('Recovered', ascending=True), 
             x="Recovered", y="RegionName", title='Recovered', text='Recovered', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['Recovered'])+10000])
fig.update_traces(marker_color=rec, opacity=0.6, textposition='outside')
fig.show()
In [54]:
fig = px.bar(dgm.sort_values('Active', ascending=False).head(5).sort_values('Active', ascending=True), 
             x="Active", y="RegionName", title='Currently Active', text='Active', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['Active'])+10000])
fig.update_traces(marker_color=act, opacity=0.6, textposition='outside')
fig.show()
In [55]:
# (Only regions with more than 500 case are considered)

dgm['Mortality Rate'] = round((dgm['Deaths']/dgm['TotalPositiveCases'])*100, 2)
temp = dgm[dgm['TotalPositiveCases']>500]
temp = temp.sort_values('Mortality Rate', ascending=False)

fig = px.bar(temp.sort_values('Mortality Rate', ascending=False).head(5).sort_values('Mortality Rate', ascending=True), 
             x="Mortality Rate", y="RegionName", text='Mortality Rate', orientation='h', 
             width=700, height=600, range_x = [0, 20], title='Mortality Rate (No. of Deaths Per 100 Confirmed Case)')
fig.update_traces(marker_color=dth, opacity=0.6, textposition='outside')
fig.show()
In [56]:
fig = px.bar(dgm.sort_values('TotalHospitalizedPatients', ascending=False).head(5).sort_values('TotalHospitalizedPatients', ascending=True), 
             x="TotalHospitalizedPatients", y="RegionName", title='TotalHospitalizedPatients', text='TotalHospitalizedPatients', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['TotalHospitalizedPatients'])+2500])
fig.update_traces(marker_color=hos, opacity=0.6, textposition='outside')
fig.show()
In [57]:
dgm['Hospitalization Rate'] = round((dgm['TotalHospitalizedPatients']/dgm['TotalPositiveCases'])*100, 2)
temp = dgm[dgm['TotalPositiveCases']>100]
temp = temp.sort_values('Mortality Rate', ascending=False)

fig = px.bar(temp.sort_values('Hospitalization Rate', ascending=False).head(5).sort_values('Hospitalization Rate', ascending=True), 
             x="Hospitalization Rate", y="RegionName", text='Hospitalization Rate', orientation='h', 
             width=700, height=600, range_x = [0, 100], title='Hospitalization Rate (No. of TotalHospitalizedPatients Per 100 Confirmed Case)')
fig.update_traces(marker_color=hos, opacity=0.6, textposition='outside')
fig.show()
In [58]:
fig = px.bar(dgm.sort_values('HomeConfinement', ascending=False).head(5).sort_values('HomeConfinement', ascending=True), 
             x="HomeConfinement", y="RegionName", title='Home Confinement', text='HomeConfinement', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['HomeConfinement'])+5000])
fig.update_traces(marker_color=act, opacity=0.6, textposition='outside')
fig.show()
In [227]:
fig = px.bar(dgm.sort_values('TestsPerformed', ascending=False).head(5).sort_values('TestsPerformed', ascending=True), 
             x="TestsPerformed", y="RegionName", title='Tests Performed (tests)', text='TestsPerformed', orientation='h', 
             width=700, height=700, range_x = [0, max(dgm['TestsPerformed'])+80000])
fig.update_traces(marker_color='purple', opacity=0.6, textposition='outside')
fig.show()

Les Cas par million d'habitants

In [60]:
# merge dataframes
temp = pd.merge(dgm_2, pop_reg, how='left', right_on='RegionName', left_on='RegionName')
# print(temp[temp['Country Name'].isna()])
temp = temp[['RegionName', 'TotalPositiveCases', 'Deaths', 'Recovered', 'Active', 'Population']]
#temp.columns = ['Region', 'TotalPositiveCases', 'Deaths', 'Recovered', 'Active', 'Population']
    
# calculate TotalPositiveCases/Population
temp['TotalPositiveCases Per Million Inhabitants'] = round(temp['TotalPositiveCases']/temp['Population']*1000000, 2)

fig = px.bar(temp.head(20).sort_values('TotalPositiveCases Per Million Inhabitants', ascending=True), 
             x='TotalPositiveCases Per Million Inhabitants', y='RegionName', orientation='h', 
             width=1000, height=700, text='TotalPositiveCases Per Million Inhabitants', title='Total Positive Cases cases Per Million Inhabitants',
             range_x = [0, max(temp['TotalPositiveCases Per Million Inhabitants'])+2500])
fig.update_traces(textposition='outside', marker_color=dth, opacity=0.7)
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [61]:
temp = pd.merge(dgm_2, pop_reg, how='left', right_on='RegionName', left_on='RegionName')
# print(temp[temp['Country Name'].isna()])
temp = temp[['RegionName', 'TotalPositiveCases', 'Deaths', 'Recovered', 'Active', 'Population','IntensiveCarePatients','HospitalizedPatients','TotalHospitalizedPatients']]
#temp.columns = ['Region', 'TotalPositiveCases', 'Deaths', 'Recovered', 'Active', 'Population']
    
# calculate Hospitalized/Population
temp['Hospitalized not in ICU Per Million Inhabitants'] = round(temp['HospitalizedPatients']/temp['Population']*1000000, 2)
temp['Hospitalized in ICU Per Million Inhabitants'] = round(temp['IntensiveCarePatients']/temp['Population']*1000000, 2)
# countries with population greater that 1 million only
#temp = temp[temp['Population']>1000000].sort_values('Confirmed Per Million People', ascending=False).reset_index(drop=True)
# temp.head()


# temp['No. of Recovered to 1 Death Case'] = round(temp['Recovered']/temp['Deaths'], 3)
temp = temp.melt(id_vars='RegionName', value_vars=['Hospitalized not in ICU Per Million Inhabitants', 'Hospitalized in ICU Per Million Inhabitants'], 
                 var_name='Hospitalized cases per Million Inhabitants', value_name='Value')

fig = px.bar(temp.sort_values('Value', ascending=True),
             x="Value", y="RegionName", color='Hospitalized cases per Million Inhabitants', orientation='h', 
             title='Hospitalized Cases Per Million Inhabitants',
             color_discrete_sequence=['saddlebrown', 'sandybrown'],
             height=1000,
             text='Value',
             range_x = [0, max(temp['Value'])+500]
             )
fig.update_traces(textposition='outside', opacity=0.7)
fig.update_layout(barmode='stack')
fig.update_layout(uniformtext_minsize=11, uniformtext_mode='hide')
fig.update_layout(legend_orientation="h", legend_title='')
fig.show()

Day by day

Throughout Italy

In [62]:
temp = Data_Byregion.groupby('Date')['NewPositiveCases'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')

fig = px.bar(temp, x="NewPositiveCases", y="Date", orientation='h', height=800, 
             text = 'NewPositiveCases',
             title='N. of New Positive Cases in Italy for each day',
             range_x = [0, max(temp['NewPositiveCases'])+1000])
fig.update_layout(xaxis_title='Newly Positive Cases')
fig.update_traces(marker_color=act, opacity=0.6, textposition='outside')
fig.show()
In [63]:
temp = Data_Byregion.groupby('Date')['TotalPositiveCases', 'Deaths', 'Recovered'].sum().reset_index()
#temp['Date'] = pd.to_datetime(temp['Date'])
#temp['Date'] = temp['Date'].dt.strftime('%d %b')
temp = temp.reset_index().sort_values(by='TotalPositiveCases', ascending=True).reset_index(drop=True)

ntemp = temp.copy()
ntemp['Active'] = ntemp['TotalPositiveCases'] - ntemp['Deaths'] - ntemp['Recovered']
ntemp = ntemp.melt(id_vars="Date", value_vars=['Active', 'Recovered', 'Deaths'])
ntemp['Date'] = pd.to_datetime(ntemp['Date'])
ntemp['Date'] = ntemp['Date'].dt.strftime('%d %b')

fig = px.bar(ntemp.sort_values(['variable', 'value']), 
             y="Date", x="value", color='variable', orientation='h', height=1200,
             title='Total N. of Active, Deceased and Recovered cases in Italy', color_discrete_sequence=[act, dth, rec])
fig.update_yaxes(categoryorder = "total ascending")
fig.update_layout(xaxis_title='Value')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(opacity=0.6)
fig.show()

By Region

In [64]:
temp = Data_Byregion.groupby(['RegionName', 'Date'])['TotalPositiveCases', 'Deaths', 'Recovered'].sum()
temp = temp.reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')

fig = px.bar(temp, x="TotalPositiveCases", y="Date", color='RegionName', orientation='h', height=1200,
             title='Total N. of Confirmed cases')
fig.show()
In [66]:
temp = Data_Byregion.groupby(['RegionName', 'Date', ])['TotalPositiveCases', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['RegionName'] != temp['RegionName'].shift(1)

temp.loc[mask, 'TotalPositiveCases'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan

temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')

fig = px.bar(temp, x="TotalPositiveCases", y="Date", color='RegionName', orientation='h', height = 1200,
             title='New  Positive Cases cases every day')
fig.show()
In [67]:
temp = Data_Byregion.groupby(['RegionName', 'Date'])['TotalPositiveCases', 'Deaths', 'Recovered'].sum()
temp = temp.reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')

fig = px.bar(temp, x="Deaths", y="Date", color='RegionName', orientation='h', height=1200,
             title='Total N. of Deaths')
fig.show()
In [211]:
temp = Data_Byregion.groupby(['RegionName', 'Date', ])['TotalPositiveCases', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['RegionName'] != temp['RegionName'].shift(1)

temp.loc[mask, 'TotalPositiveCases'] = (np.nan)
temp.loc[mask, 'Deaths'] = (np.nan)
temp.loc[mask, 'Recovered'] = (np.nan)

temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')

fig = px.bar(temp, x="Deaths", y="Date", color='RegionName', orientation='h', height=1200,
             title='New Deaths every day')
fig.show()
In [70]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['TotalPositiveCases'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%m/%d/%Y')
temp = temp.sort_values(by='Date')

fig = px.bar(temp, y='RegionName', x='TotalPositiveCases', color='RegionName', orientation='h',  
             title='Total Positive Cases cases over time', animation_frame='Date', height=1000, 
             range_x=[0, max(temp['TotalPositiveCases']+5000)],
             text='TotalPositiveCases')
fig.update_traces(textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [202]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['Deaths'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%m/%d/%Y')
temp = temp.sort_values(by='Date')

fig = px.bar(temp, y='RegionName', x='Deaths', color='RegionName', orientation='h',  
             title='Deaths Cases cases over time', animation_frame='Date', height=1000, 
             range_x=[0, max(temp['Deaths']+5000)],
             text='Deaths')
fig.update_traces(textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [204]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['Recovered'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%m/%d/%Y')
temp = temp.sort_values(by='Date')

fig = px.bar(temp, y='RegionName', x='Recovered', color='RegionName', orientation='h',  
             title='Recovered Cases cases over time', animation_frame='Date', height=1000, 
             range_x=[0, max(temp['Recovered']+5000)],
             text='Recovered')
fig.update_traces(textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [207]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['Active'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%m/%d/%Y')
temp = temp.sort_values(by='Date')

fig = px.bar(temp, y='RegionName', x='Active', color='RegionName', orientation='h',  
             title='Active cases over time', animation_frame='Date', height=1000, 
             range_x=[0, max(temp['Active']+2000)],
             text='Active')
fig.update_traces(textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [206]:
temp = Data_Byprovince.groupby(['Date', 'ProvinceName'])['TotalPositiveCases'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%m/%d/%Y')
temp = temp.sort_values(by='Date')

fig = px.bar(temp, y='ProvinceName', x='TotalPositiveCases', color='ProvinceName', orientation='h',  
             title='Total Positive Cases cases over time Per Province', animation_frame='Date', height=1000, 
             range_x=[0, max(temp['TotalPositiveCases']+5000)],
             text='TotalPositiveCases')
fig.update_traces(textposition='outside')
fig.update_layout(yaxis={'categoryorder':'total ascending'})
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()
In [71]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['TotalPositiveCases'].sum().reset_index()
temp['Date'] = pd.to_datetime(temp['Date'])
temp['Date'] = temp['Date'].dt.strftime('%d %b')
px.line(temp, x="Date", y="TotalPositiveCases", color='RegionName', title='Cases Spread', height=600)
In [72]:
temp = data_grouped
fig = px.scatter(temp, 
                 x='TotalPositiveCases', y='Deaths', color='RegionName',
                 text='RegionName', log_x=True, log_y=True, title='Deaths vs TotalPositiveCases')
fig.update_traces(textposition='top center')
fig.show()

Composition des Cas Per Region

In [142]:
fig = px.treemap(data.sort_values(by='TotalPositiveCases', ascending=False).reset_index(drop=True), 
                 path=["RegionName"], values="TotalPositiveCases", height=700,
                 title='Number of Total Positive  Cases',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(data.sort_values(by='Deaths', ascending=False).reset_index(drop=True), 
                 path=["RegionName"], values="Deaths", height=700,
                 title='Number of Deaths reported',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(data.sort_values(by='Recovered', ascending=False).reset_index(drop=True), 
                 path=["RegionName"], values="Recovered", height=700,
                 title='Number of Recovered reported',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

fig = px.treemap(data.sort_values(by='Active', ascending=False).reset_index(drop=True), 
                 path=["RegionName"], values="Active", height=700,
                 title='Number of Active reported',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

Composition des Cas Per Province

In [143]:
fig = px.treemap(data_grouped_province.sort_values(by='TotalPositiveCases', ascending=False).reset_index(drop=True), 
                 path=["ProvinceName"], values="TotalPositiveCases", height=700,
                 title='Number of Total Positive Cases reported Per Province',
                 color_discrete_sequence = px.colors.qualitative.Prism)
fig.data[0].textinfo = 'label+text+value'
fig.show()

Durée de l'épidémie

Remarque:

Dans le graphique, le dernier jour est indiqué comme un jour après la dernière notification d'un nouveau cas confirmé.

In [75]:
# first date
# ----------
first_date = Data_Byregion[Data_Byregion['TotalPositiveCases']>0]
# converting Date to datetime
first_date['Date'] = pd.to_datetime(first_date['Date'])
first_date = first_date.groupby('RegionName')['Date'].agg(['min']).reset_index()
# first_date.head()

from datetime import timedelta  

# last date
# ---------
last_date = Data_Byregion
# converting Date to datetime
last_date['Date'] = pd.to_datetime(last_date['Date'])
last_date = Data_Byregion.groupby(['RegionName', 'Date', ])['TotalPositiveCases', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = last_date['RegionName'] != last_date['RegionName'].shift(1)
last_date.loc[mask, 'TotalPositiveCases'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan

last_date = last_date[last_date['TotalPositiveCases']>0]
last_date = last_date.groupby('RegionName')['Date'].agg(['max']).reset_index()
# last_date.head()

# first_last
# ----------
first_last = pd.concat([first_date, last_date[['max']]], axis=1)

# added 1 more day, which will show the next day as the day on which last case appeared
first_last['max'] = first_last['max'] + timedelta(days=1)

# no. of days
first_last['Days'] = first_last['max'] - first_last['min']

# task column as country
first_last['Task'] = first_last['RegionName']

# rename columns
first_last.columns = ['RegionName', 'Start', 'Finish', 'Days', 'Task']

# sort by no. of days
first_last = first_last.sort_values('Days')
# first_last.head()

# visualization
# --------------

# produce random colors
clr = ["#"+''.join([random.choice('0123456789ABC') for j in range(6)]) for i in range(len(first_last))]

#plot
fig = ff.create_gantt(first_last, index_col='RegionName', colors=clr, show_colorbar=False, 
                      bar_width=0.2, showgrid_x=True, showgrid_y=True, height=500, 
                      title=('Gantt Chart'))
fig.show()

Region Wise

Confirmed cases

In [76]:
temp = Data_Byregion.groupby(['Date', 'RegionName'])['TotalPositiveCases'].sum()
temp = temp.reset_index().sort_values(by=['Date', 'RegionName'])

plt.style.use('seaborn')
g = sns.FacetGrid(temp, col="RegionName", hue="RegionName", 
                  sharey=False, col_wrap=4)
g = g.map(plt.plot, "Date", "TotalPositiveCases")
g.set_xticklabels(rotation=90)
plt.show()

Ln(TotalPositiveCases)

In [77]:
temp = Data_Byregion.copy()

temp['LnTotalPositiveCases'] = np.log(temp['TotalPositiveCases'])
temp = temp.groupby(['Date', 'RegionName'])['LnTotalPositiveCases'].sum()
temp = temp.reset_index().sort_values(by=['Date', 'RegionName'])


plt.style.use('seaborn')
g = sns.FacetGrid(temp, col="RegionName", hue="RegionName", 
                  sharey=False, col_wrap=4)
g = g.map(plt.plot, "Date", "LnTotalPositiveCases")
g.set_xticklabels(rotation=90)
plt.show()

New Positive Cases

In [91]:
temp = Data_Byregion.groupby(['RegionName', 'Date', ])['TotalPositiveCases', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()

mask = temp['RegionName'] != temp['RegionName'].shift(1)

temp.loc[mask, 'TotalPositiveCases'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan

plt.style.use('seaborn')
g = sns.FacetGrid(temp, col="RegionName", hue="RegionName", 
                  sharey=False, col_wrap=4)
g = g.map(sns.lineplot, "Date", "TotalPositiveCases")
g.set_xticklabels(rotation=90)
plt.show()

Calendar map

In [79]:
Data_Byregion['Date'] = pd.to_datetime(Data_Byregion['Date'])

Nombre de nouveaux cas confirmés chaque jour

In [80]:
temp = Data_Byregion.groupby('Date')['TotalPositiveCases'].sum()
temp = temp.diff()

plt.figure(figsize=(20, 5))
ax = calmap.yearplot(temp, fillcolor='white', cmap='Oranges', linewidth=0.5)

Nombre de nouveaux décès chaque jour

In [81]:
temp = Data_Byregion.groupby('Date')['Deaths'].sum()
temp = temp.diff()

plt.figure(figsize=(20, 5))
ax = calmap.yearplot(temp, fillcolor='white', cmap='Reds', linewidth=0.5)

Nombre de régions nouvellement touchées chaque jour

In [82]:
spread = Data_Byregion[Data_Byregion['TotalPositiveCases']!=0].groupby('Date')
spread = spread['RegionName'].unique().apply(len).diff()

plt.figure(figsize=(20, 5))
ax = calmap.yearplot(spread, fillcolor='white', cmap='Greens', linewidth=0.5)
In [83]:
# Italy_Province

m_province = folium.Map(location=[41.8719, 12.5674],
               min_zoom=5, max_zoom=10, zoom_start=5)

for i in range(0, len(Data_Byprovince)):
    folium.Circle(
        location=[Data_Byprovince.iloc[i]['Latitude'], Data_Byprovince.iloc[i]['Longitude']],
        color='crimson', 
        fill = True,
        fill_color='crimson',
        tooltip =   "<div style='margin: 0; background-color: black; color: white;'>"+
                    '<li><bold>Country : '+str(Data_Byprovince.iloc[i]['Country'])+
                    '<li><bold>RegionName : '+str(Data_Byprovince.iloc[i]['RegionName'])+
                    '<li><bold>ProvinceName : '+str(Data_Byprovince.iloc[i]['ProvinceName'])+
                    '<li><bold>TotalPositiveCases : '+str(Data_Byprovince.iloc[i]['TotalPositiveCases']),
                radius=int(Data_Byprovince.iloc[i]['TotalPositiveCases'])**1).add_to(m_province)
m_province.save('m_province.html')

m_province
Out[83]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [84]:
plt.figure(figsize=(10,5), dpi=100)

plt.style.use('default')

Lazio = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lazio"]['TotalPositiveCases']

Veneto = Data_Byregion.loc[Data_Byregion['RegionName'] == "Veneto"]['TotalPositiveCases']

Lombardia = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lombardia"]['TotalPositiveCases']

bp = plt.boxplot([Lazio, Veneto, Lombardia], labels=['Lazio', 'Veneto', "Lombardia"], patch_artist=True)

plt.title('Total Positive Cases Region Comparison')
plt.ylabel('Total Positive Cases Per Region')
plt.xlabel('Region Name')

for box in bp['boxes']:
    #Set edge color:
    box.set(color='#4286f4', linewidth=2)
    # Change Fill Color:
    box.set(facecolor = '#e0e0e0')
    
    

plt.show()
In [85]:
plt.figure(figsize=(10,5), dpi=100)

plt.style.use('default')

Lazio = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lazio"]['Recovered']

Veneto = Data_Byregion.loc[Data_Byregion['RegionName'] == "Veneto"]['Recovered']

Lombardia = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lombardia"]['Recovered']

bp = plt.boxplot([Lazio, Veneto, Lombardia], labels=['Lazio', 'Veneto', "Lombardia"], patch_artist=True)

plt.title('Recovered Cases Region Comparison')
plt.ylabel('Recovered Per Region')
plt.xlabel('Region Name')

for box in bp['boxes']:
    #Set edge color:
    box.set(color='#4286f4', linewidth=2)
    # Change Fill Color:
    box.set(facecolor = '#e0e0e0')
    
    

plt.show()
In [86]:
plt.figure(figsize=(10,5), dpi=100)

plt.style.use('default')

Lazio = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lazio"]['Deaths']

Veneto = Data_Byregion.loc[Data_Byregion['RegionName'] == "Veneto"]['Deaths']

Lombardia = Data_Byregion.loc[Data_Byregion['RegionName'] == "Lombardia"]['Deaths']

bp = plt.boxplot([Lazio, Veneto, Lombardia], labels=['Lazio', 'Veneto', "Lombardia"], patch_artist=True)

plt.title('Deaths Cases Region Comparison')
plt.ylabel('Deaths Cases Per Region')
plt.xlabel('Region Name')

for box in bp['boxes']:
    #Set edge color:
    box.set(color='#4286f4', linewidth=2)
    # Change Fill Color:
    box.set(facecolor = '#e0e0e0')
    
    

plt.show()
In [175]:
np.random.seed(19680801)

plt.rcdefaults()
fig, ax = plt.subplots()

# Importation Des Données
Region = data['RegionName']
error = np.random.rand(len(Region))
TotalPositiveCases = data['TotalPositiveCases']


ax.barh(Region, TotalPositiveCases, xerr=error, align='center', color='b')
ax.set_yticks(Region)
ax.set_yticklabels(Region)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('TotalPositiveCases')
ax.set_ylabel('Name of Region ')
ax.set_title('Distrubition Total Positive Cases By Region')

plt.show()
In [176]:
np.random.seed(19680801)

plt.rcdefaults()
fig, ax = plt.subplots()

# Importation Des Données
Region = data['RegionName']
error = np.random.rand(len(Region))
Deaths = data['Deaths']


ax.barh(Region, Deaths, xerr=error, align='center', color='Red')
ax.set_yticks(Region)
ax.set_yticklabels(Region)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Deaths')
ax.set_ylabel('Name of Region ')
ax.set_title('Distrubition Deaths By Region')

plt.show()
In [178]:
np.random.seed(19680801)

plt.rcdefaults()
fig, ax = plt.subplots()

# Importation Des Données
Region = data['RegionName']
error = np.random.rand(len(Region))
Recovered = data['Recovered']


ax.barh(Region, Recovered, xerr=error, align='center', color='Green')
ax.set_yticks(Region)
ax.set_yticklabels(Region)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Recovered')
ax.set_ylabel('Name of Region ')
ax.set_title('Distribution Recovered By Region')

plt.show()
In [180]:
np.random.seed(19680801)

plt.rcdefaults()
fig, ax = plt.subplots()

# Importation Des Données
Region = data['RegionName']
error = np.random.rand(len(Region))
Active = data['Active']


ax.barh(Region, Active, xerr=error, align='center', color='Orange')
ax.set_yticks(Region)
ax.set_yticklabels(Region)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel('Active')
ax.set_ylabel('Name of Region ')
ax.set_title('Distribution Active By Region')

plt.show()
In [160]:
sns.barplot(x=Data_Byregion['RegionName'], y=Data_Byregion['TotalPositiveCases'])
plt.xticks(rotation=90, ha='right')
plt.title('Total Positive Cases By Region')
plt.rc('figure', figsize=(10, 5))
In [161]:
sns.barplot(x=Data_Byregion['RegionName'], y=Data_Byregion['Deaths'])
plt.xticks(rotation=90, ha='right')
plt.title('Number The Deaths By Region')
plt.rc('figure', figsize=(10, 5))
In [162]:
sns.barplot(x=Data_Byregion['RegionName'], y=Data_Byregion['Recovered'])
plt.xticks(rotation=90, ha='right')
plt.title('Number The Recovred By Region')
plt.rc('figure', figsize=(10, 5))
In [164]:
#Province Bar Plot
sns.barplot(x=Data_Byprovince['ProvinceName'], y=Data_Byprovince['TotalPositiveCases'])
plt.xticks(rotation=90, size=80, ha='right')
plt.yticks(size=80, ha='right')



plt.title('Total Positive Cases By Province',size=80)
plt.ylabel('Total Positive Cases', size=80)
xlabel=plt.xlabel('Name Of Province', size=80)


plt.rc('figure', figsize=(160,80))

Créeation De La Base des Données

In [165]:
import sqlite3

conn = sqlite3.connect('Projet_Lsd_Covid_19.db')

c = conn.cursor()

Connexion de la table by region avec la base des données

In [166]:
Data_Byregion.to_sql('Covid_19_in_italy_Byregion', conn, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None, method=None)

Connexion de la table by province avec la base des données

In [167]:
Data_Byprovince.to_sql('Covid_19_in_italy_Byprovince', conn,schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None, method=None)

Pies Visualisation

Per Region

In [168]:
#Distribution of Total Positive Cases Per Region
fig = px.pie(data, values=data['TotalPositiveCases'], names=data['RegionName'],
            title='Distribution of Total Positive Cases Per Region',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white')
plt.rc('figure', figsize=(160,80))
fig.show()
In [169]:
#Distribution of Deaths Per Region
fig = px.pie(data, values=data['Deaths'], names=data['RegionName'],
            title='Distribution of Deaths Per Region',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white')
fig.show()
In [170]:
#Distribution of Recovered Per Region
fig = px.pie(data, values=data['Recovered'], names=data['RegionName'],
            title='Distribution of Recovered Per Region',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white')
fig.show()
In [33]:
#Distribution of Active Per Region
fig = px.pie(data, values=data['Active'], names=data['RegionName'],
            title='Distribution of Active Per Region',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white')
fig.show()

Per Province

In [172]:
#Distribution of Total Positive Cases Per Province
fig = px.pie(data_grouped_province, values=data_grouped_province['TotalPositiveCases'], names=data_grouped_province['ProvinceName'],
            title='Distribution of Total Positive Cases Per Province',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white')
fig.show()

Scatter Plots

Per Region

In [189]:
fig = px.scatter(data, x=data['RegionName'], y=data['TotalPositiveCases'], size=data['TotalPositiveCases'],
                color=data['RegionName'], hover_name=data['RegionName'], size_max=60)

fig.update_layout()
fig.show()
In [190]:
fig = px.scatter(data, x=data['RegionName'], y=data['Deaths'], size=data['Deaths'],
                color=data['RegionName'], hover_name=data['RegionName'], size_max=60)

fig.update_layout()
fig.show()
In [197]:
fig = px.scatter(data, x=data['RegionName'], y=data['Recovered'], size=data['Recovered'],
                color=data['RegionName'], hover_name=data['RegionName'], size_max=60 
                     )

fig.update_layout()
fig.show()
In [192]:
fig = px.scatter(data, x=data['RegionName'], y=data['Active'], size=data['Active'],
                color=data['RegionName'], hover_name=data['RegionName'], size_max=60)

fig.update_layout()
fig.show()

Per Province

In [195]:
fig = px.scatter(data_grouped_province, x=data_grouped_province['ProvinceName'], y=data_grouped_province['TotalPositiveCases'], size=data_grouped_province['TotalPositiveCases'],
                color=data_grouped_province['ProvinceName'], hover_name=data_grouped_province['ProvinceName'], size_max=60)

fig.update_layout()
fig.show()

Conclusion

  • Les premiers cas de coronavirus à propagation terrestre en Italie sont apparus dans les régions du nord de la Lombardie, de la Vénétie et de l'Émilie-Romagne le 20 février

  • La collecte des données a commencé le 24 février

  • Le 8 mars 2020, le Premier ministre Giuseppe Conte a étendu la quarantaine à toute la Lombardie et à 14 autres provinces du nord, et le lendemain à toute l'Italie, plaçant plus de 60 millions de personnes en quarantaine.

  • Le 11 mars 2020, PM Conte a interdit la quasi-totalité des activités commerciales à l'exception des supermarchés et des pharmacies.

  • Le 16 mars 2020, l'Italie est devenue le centre mondial des cas actifs de coronavirus avec deux fois plus de cas actifs de tout autre pays, y compris la Chine et l'Iran, combinés à 20603 cas actifs. Les USA ont pris le relais quelques semaines plus tard, le 11 avril.

  • Au 8 mai 2020, l'Italie comptait 87 961 cas actifs, l'un des nombres les plus élevés au monde. Dans l'ensemble, il y a eu 217 185 cas confirmés et 30 201 décès (un taux de mortalité d'environ 500 par million d'habitants), tandis qu'il y a eu 99 023 récupérations ou licenciements.

  • Le 8 mai, l'Italie avait testé environ 1 610 000 personnes.

Étude Statistique

  • Pour connaitre le nombre de cas à la date maximale l'erreur serait de faire la somme de la colonne Confirmed, car il s'agit d'une donnée cumulée chaque jour !
  • Il faut donc extraire les données à la date souhaitée et de réaliser ensuite les différents calculs

  • On extrait les données à la derniere date la plus récente

In [212]:
dataDerniereDate = Data_Byregion[Data_Byregion['Date'] == max(Data_Byregion['Date'])].reset_index()
In [213]:
TotalPositiveCases = dataDerniereDate["TotalPositiveCases"].sum()
TotalPositiveCases
Out[213]:
268218
In [214]:
Recovered = dataDerniereDate["Recovered"].sum()
Recovered
Out[214]:
208536
In [215]:
Deaths = dataDerniereDate["Deaths"].sum()
Deaths
Out[215]:
35477

les statistiques de toute l'Italie :

In [216]:
print("  Confirmes : "+str(TotalPositiveCases))
print("  Gueris : "+str(Recovered))
print("  Decedes : "+str(Deaths))
print("  Taux mortalité (%): "+str(round((Deaths/TotalPositiveCases)*100,2)))

#Pour utilisation dans la synthèse
confirmes = TotalPositiveCases
gueris = Recovered
decedes = Deaths
mortalite = round((Deaths/TotalPositiveCases)*100,2)
  Confirmes : 268218
  Gueris : 208536
  Decedes : 35477
  Taux mortalité (%): 13.23
In [217]:
#Déterminer les caractéristiques de la variable des guérisons "Recovered":
#la moyenne :
print("la moyenne est : ",data["Recovered"].mean())
#la variance :
print("la variance est :",data["Recovered"].var())
#l'écart type :
print("lécart type est :",data["Recovered"].std())
#le Min,Max et la somme :
print("le minimum de la série est : ",min(data["Recovered"]))
print("le maximum de la série est : ",max(data["Recovered"]))
print("la somme des éléments de la série est :",sum(data["Recovered"]))
#la médiane :
print("la médiane est :",data["Recovered"].median())
la moyenne est :  9930.285714285714
la variance est : 290316646.11428577
lécart type est : 17038.68087952485
le minimum de la série est :  408
le maximum de la série est :  76248
la somme des éléments de la série est : 208536
la médiane est : 4029.0
In [218]:
#Déterminer les caractéristiques de la variable des décès "Deaths":
#la moyenne :
print("la moyenne est : ",data["Deaths"].mean())
#la variance :
print("la variance est :",data["Deaths"].var())
#l'écart type :
print("lécart type est :",data["Deaths"].std())
#le Min,Max et la somme :
print("le minimum de la série est : ",min(data["Deaths"]))
print("le maximum de la série est : ",max(data["Deaths"]))
print("la somme des éléments de la série est :",sum(data["Deaths"]))
#la médiane :
print("la médiane est :",data["Deaths"].median())
la moyenne est :  1689.3809523809523
la variance est : 13637325.047619045
lécart type est : 3692.874902784962
le minimum de la série est :  23
le maximum de la série est :  16863
la somme des éléments de la série est : 35477
la médiane est : 445.0
In [219]:
#Déterminer les caractéristiques de la variable du Total des cas positifs "TotalPositiveCases":
#la moyenne :
print("la moyenne est : ",data["TotalPositiveCases"].mean())
#la variance :
print("la variance est :",data["TotalPositiveCases"].var())
#l'écart type :
print("lécart type est :",data["TotalPositiveCases"].std())
#le Min,Max et la somme :
print("le minimum de la série est : ",min(data["TotalPositiveCases"]))
print("le maximum de la série est : ",max(data["TotalPositiveCases"]))
print("la somme des éléments de la série est :",sum(data["TotalPositiveCases"]))
#la médiane :
print("la médiane est :",np.median(data["TotalPositiveCases"]))
la moyenne est :  12772.285714285714
la variance est : 488863331.1142858
lécart type est : 22110.25398122522
le minimum de la série est :  524
le maximum de la série est :  99940
la somme des éléments de la série est : 268218
la médiane est : 5092.0

Description De La Table By Region

In [39]:
data_grouped.describe()
Out[39]:
TotalPositiveCases Deaths Recovered Active
count 21.000000 21.000000 21.000000 21.000000
mean 12772.285714 1689.380952 9930.285714 1152.619048
std 22110.253981 3692.874903 17038.680880 1574.390278
min 524.000000 23.000000 408.000000 23.000000
25% 2114.000000 146.000000 1442.000000 232.000000
50% 5092.000000 445.000000 4029.000000 509.000000
75% 11043.000000 1141.000000 8827.000000 1405.000000
max 99940.000000 16863.000000 76248.000000 6829.000000
  • Dans cette description, nous allons faire une étude bivariée concernant les deux variables : "Total Positive Cases" et "Tests Performed"; Mettons la variable 'TestsPerformed' variable dépendante et la variable 'TotalPositiveCases' comme variable explicative.
In [29]:
#calcul du matrice de covariance
x = list(data["TotalPositiveCases"])
y = list(data["Recovered"])
np.cov(x,y)
Out[29]:
array([[4.88863331e+08, 3.76359622e+08],
       [3.76359622e+08, 2.90316646e+08]])
In [30]:
#Pour le coefficient de corrélation
np.corrcoef(x,y)
Out[30]:
array([[1.        , 0.99901802],
       [0.99901802, 1.        ]])
  • la donnée qui nous intéresse est donc en haut à droite et/ou en bas à gauche
  • Plus le coefficient est proche de 1, la relation linéaire positive entre les variables est forte.
  • Pour les coefficients de la droite des moindres carrés, on va utiliser la fonction linregress de scipy.stats.
  • Elleprend en arguments les deux listes de données X et Y et renvoie 5 nombres réels. Seuls les 3 premiers nous
  • intéressent : le premier est le coefficient directeur de la droite, le deuxième est l’ordonnée à l’origine et le troisième
In [35]:
#le coefficient de corrélation :
from scipy.stats import linregress
linregress(x,y)
Out[35]:
LinregressResult(slope=0.7698667457966912, intercept=97.32767504300318, rvalue=0.9990180226756431, pvalue=3.4670499884179204e-27, stderr=0.007832934981330633)
In [36]:
plt.plot(x,y,'b*') #nuage de points
Out[36]:
[<matplotlib.lines.Line2D at 0x7fd442f6c210>,
 <matplotlib.lines.Line2D at 0x7fd442f6c450>]
In [225]:
data.to_csv('Data_Grouped.csv')
In [ ]:
data_